1 Pre-processing

1.1 Load package

library("RColorBrewer")
library(Signac)
library(Seurat)
library(GenomicRanges)
library(future)
#library(SeuratWrappers)
library(harmony)
library(EnsDb.Hsapiens.v86)
library(stringr)
library(dplyr)
library(ggplot2)
library(patchwork)
library(kableExtra)
library(tidyverse)

set.seed(123)

1.2 Parameters

# Paths
path_to_obj <- ("~/Documents/multiome_tonsil_Lucia/results/R_objects/14.tonsil_multiome_bcells_without_cluster4_doublets_normalized.rds")

path_to_markers<-("~/Documents/multiome_tonsil_Lucia/results/tables/tonsil_markers_bcell_withoutclust4_075.csv")

1.3 Load data

tonsil_wnn_bcell <- readRDS(path_to_obj)

tonsil_markers_075<-read_csv(path_to_markers)
## New names:
## * `` -> ...1
## Rows: 4815 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): ...1, gene
## dbl (6): p_val, avg_log2FC, pct.1, pct.2, p_val_adj, cluster
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2 UMAP

 DimPlot(
    tonsil_wnn_bcell,
    group.by = "wsnn_res.0.075",
    reduction = "wnn.umap",
    pt.size = 0.1, label = T
  )

2.1 Get top n markers of each cluster

Resolution 0.075

top5_tonsil_markers_075<-tonsil_markers_075 %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
top7_tonsil_markers_075<-tonsil_markers_075 %>% group_by(cluster) %>% top_n(n = 7, wt = avg_log2FC)
top10_tonsil_markers_075<-tonsil_markers_075 %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
df_top5<-as.data.frame(top5_tonsil_markers_075)
kbl(df_top5,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 1: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
COL19A1 0 2.3418794 0.799 0.221 0 0 COL19A1
STEAP1B 0 2.1317977 0.521 0.126 0 0 STEAP1B
CD69 0 1.6733680 0.519 0.174 0 0 CD69
CCSER1 0 1.5429622 0.646 0.303 0 0 CCSER1
LIX1-AS1 0 1.4711405 0.270 0.098 0 0 LIX1-AS1
ANK3 0 1.9694612 0.604 0.219 0 1 ANK3
SSPN 0 1.7975224 0.446 0.099 0 1 SSPN
AL355076.2 0 1.7115854 0.500 0.115 0 1 AL355076.2
HIPK2 0 1.6423040 0.379 0.144 0 1 HIPK2
PDE4D 0 1.5809119 0.831 0.564 0 1 PDE4D
HMGB2 0 3.0697315 0.956 0.135 0 2 HMGB2
TUBA1B 0 2.9693211 0.969 0.222 0 2 TUBA1B
H2AFZ 0 2.7980292 0.968 0.233 0 2 H2AFZ
TOP2A 0 2.5647124 0.818 0.019 0 2 TOP2A
HIST1H4C 0 2.5492101 0.854 0.310 0 2 HIST1H4C
MAML31 0 2.7180961 0.837 0.226 0 3 MAML3
AC023590.11 0 2.6018627 0.985 0.274 0 3 AC023590.1
AC104170.1 0 2.5051352 0.821 0.148 0 3 AC104170.1
RAPGEF51 0 2.3028431 0.933 0.233 0 3 RAPGEF5
LHFPL21 0 2.1961632 0.798 0.228 0 3 LHFPL2
IGHGP 0 5.9845041 0.501 0.145 0 4 IGHGP
IGHG1 0 5.8829304 0.638 0.291 0 4 IGHG1
IGHA1 0 6.4993828 0.646 0.424 0 4 IGHA1
IGKC 0 5.9777857 0.961 0.915 0 4 IGKC
IGLC2 0 5.9067244 0.894 0.755 0 4 IGLC2
MT-ND22 0 0.9424789 0.988 0.956 0 5 MT-ND2
ZBTB161 0 0.9873801 0.336 0.076 0 5 ZBTB16
AC105402.32 0 1.0221542 0.848 0.479 0 5 AC105402.3
GAB12 0 0.9870825 0.452 0.178 0 5 GAB1
FKBP51 0 0.9224334 0.572 0.335 0 5 FKBP5
df_top7<-as.data.frame(top7_tonsil_markers_075)
df_mark<-as.data.frame(tonsil_markers_075)
kbl(df_top7,caption = "Table of the top 5 marker of each cluster resolution 0.005") %>%
  kable_paper("striped", full_width = F)
Table 2: Table of the top 5 marker of each cluster resolution 0.005
…1 p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
COL19A1 0 2.3418794 0.799 0.221 0 0 COL19A1
STEAP1B 0 2.1317977 0.521 0.126 0 0 STEAP1B
CD69 0 1.6733680 0.519 0.174 0 0 CD69
CCSER1 0 1.5429622 0.646 0.303 0 0 CCSER1
LIX1-AS1 0 1.4711405 0.270 0.098 0 0 LIX1-AS1
PTPRK 0 1.4264085 0.471 0.130 0 0 PTPRK
GAB1 0 1.3734829 0.305 0.108 0 0 GAB1
ANK3 0 1.9694612 0.604 0.219 0 1 ANK3
SSPN 0 1.7975224 0.446 0.099 0 1 SSPN
AL355076.2 0 1.7115854 0.500 0.115 0 1 AL355076.2
HIPK2 0 1.6423040 0.379 0.144 0 1 HIPK2
PDE4D 0 1.5809119 0.831 0.564 0 1 PDE4D
ZDHHC14 0 1.5635006 0.763 0.400 0 1 ZDHHC14
SYT1 0 1.5510644 0.248 0.034 0 1 SYT1
HMGB2 0 3.0697315 0.956 0.135 0 2 HMGB2
TUBA1B 0 2.9693211 0.969 0.222 0 2 TUBA1B
H2AFZ 0 2.7980292 0.968 0.233 0 2 H2AFZ
TOP2A 0 2.5647124 0.818 0.019 0 2 TOP2A
HIST1H4C 0 2.5492101 0.854 0.310 0 2 HIST1H4C
STMN1 0 2.5179082 0.941 0.090 0 2 STMN1
TUBB 0 2.3594810 0.950 0.176 0 2 TUBB
MAML31 0 2.7180961 0.837 0.226 0 3 MAML3
AC023590.11 0 2.6018627 0.985 0.274 0 3 AC023590.1
AC104170.1 0 2.5051352 0.821 0.148 0 3 AC104170.1
RAPGEF51 0 2.3028431 0.933 0.233 0 3 RAPGEF5
LHFPL21 0 2.1961632 0.798 0.228 0 3 LHFPL2
AFF21 0 2.0659813 0.927 0.255 0 3 AFF2
FGD61 0 2.0458983 0.860 0.226 0 3 FGD6
IGHGP 0 5.9845041 0.501 0.145 0 4 IGHGP
IGHG1 0 5.8829304 0.638 0.291 0 4 IGHG1
IGHG3 0 5.8542993 0.697 0.318 0 4 IGHG3
IGLC1 0 5.8772951 0.827 0.464 0 4 IGLC1
IGHA1 0 6.4993828 0.646 0.424 0 4 IGHA1
IGKC 0 5.9777857 0.961 0.915 0 4 IGKC
IGLC2 0 5.9067244 0.894 0.755 0 4 IGLC2
MT-ND22 0 0.9424789 0.988 0.956 0 5 MT-ND2
ZBTB161 0 0.9873801 0.336 0.076 0 5 ZBTB16
MT-ND12 0 0.9149171 0.988 0.966 0 5 MT-ND1
AC105402.32 0 1.0221542 0.848 0.479 0 5 AC105402.3
GAB12 0 0.9870825 0.452 0.178 0 5 GAB1
ST6GALNAC31 0 0.8516123 0.544 0.234 0 5 ST6GALNAC3
FKBP51 0 0.9224334 0.572 0.335 0 5 FKBP5

2.1.1 Dynamic table top 7

#install.packages("htmlwidgets", type = "binary")
#install.packages("DT", type = "binary")

DT::datatable(df_top7)

2.1.2 Dynamic table of all markers

DT::datatable(df_mark)

2.1.3 Dotplot

markers<-c("BANK1", "FCER2","CD3D", "IL7R","MKI67", "TOP2A","MARCKSL1", "RGS13", "LMO2", "CCDC88A","GNLY", "NKG7", "GZMK", "CD8A",  "FCRL4", "FCRL5", "PLAC8", "SOX5","IGHG1", "IGHA1", "JCHAIN", "XBP1","LYZ", "S100A8","MT2A", "CD3D", "TRAC", "PCNA","CD19","CR2","MS4A1","RALGPS2","CD79A")

markerGenes <- unique(markers)

geneSym <- ifelse(test = !grepl('NA', markerGenes), 
             yes = sub('.*?-', '', markerGenes),
             no = sub('-.*', '', markerGenes))

Idents(tonsil_wnn_bcell) <- "wsnn_res.0.075"

dot <- DotPlot(tonsil_wnn_bcell, features = unique(markers),cols = c("lightgrey", "blue"), cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) + scale_x_discrete(labels= geneSym)+ggtitle("Known markers")

dot +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 5))

Idents(tonsil_wnn_bcell) <- "wsnn_res.0.075"

dot.10 <- DotPlot(tonsil_wnn_bcell, features = unique(top10_tonsil_markers_075$gene),cols =  c("lightgrey", "blue"), cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8)) +ggtitle("res 0.1 top 10 of each cluster")

dot.5 <- DotPlot(tonsil_wnn_bcell, features = unique(top5_tonsil_markers_075$gene),cols =  c("lightgrey", "blue"), cluster.idents = T) + theme(axis.text.x = element_text( size = 10, vjust = 0.8, hjust = 0.8))+ggtitle("res 0.1 top 5 of each cluster")
dot.10 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 5))

dot.5 +
  coord_flip() +
  theme(axis.title = element_blank(), axis.text.y = element_text(size = 7))

3 Markers exploration

top7mark_cluster0<-top7_tonsil_markers_075[["gene"]][1:7]
top7mark_cluster1<-top7_tonsil_markers_075[["gene"]][8:14]
top7mark_cluster2<-top7_tonsil_markers_075[["gene"]][15:21]
top7mark_cluster3<-top7_tonsil_markers_075[["gene"]][22:28]
top7mark_cluster4<-top7_tonsil_markers_075[["gene"]][29:35]
top7mark_cluster5<-top7_tonsil_markers_075[["gene"]][36:42]
markers_gg <- function(x){purrr::map(x, function(x) {
  
  p <- FeaturePlot(
    tonsil_wnn_bcell,
    features = x,
    reduction = "wnn.umap",
    pt.size = 0.1
  )
  p
})}

3.1 Iñaki markers

m<-c("PRDM1","XBP1","IRF4","MEF2B","BCL6")

DZ<-c("SUGCT", "CXCR4", "AICDA")

LZ<- c("CD83","BCL2A1")

GC<- c("MEF2B", "BCL6","IRF4")

PC<- c("PRDM1","SLAMF7", "MZB1", "FKBP11")

3.1.1 Dark Zone

markers_gg(DZ)
## [[1]]

## 
## [[2]]

## 
## [[3]]

3.1.2 Light Zone

markers_gg(LZ)
## [[1]]

## 
## [[2]]

3.1.3 Germinal Center

markers_gg(GC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

3.1.4 Plasma cell

markers_gg(PC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## Pauli markers

naive_mem_bcell<-c("BANK1", "FCER2")
cd4_tcell<-c("CD3D", "IL7R")
dz_gc_bcell<-c("MKI67", "TOP2A")
lz_gc_bcell<-c("MARCKSL1", "RGS13", "LMO2", "CCDC88A")  
cytotoxic<-c("GNLY", "NKG7", "GZMK", "CD8A")
memory_bcell<-c(    "FCRL4", "FCRL5", "PLAC8", "SOX5")
pc<-c("IGHG1", "IGHA1", "JCHAIN", "XBP1")
myeloid<-c("LYZ", "S100A8") 
poor_q_doublets <-c("FDCSP", "CLU", "CXCL13", "CR2")
doublet_proliferative_tcell<-c("MT2A", "CD3D", "TRAC", "PCNA")
Unk<-c("PTPRCAP", "CD37", "CD74")   
PDC<-c("PTCRA", "LILRA4", "IRF7")

TCL1A:memory naive b cells IGHD:Group enriched (naive B-cell, memory B-cell) FCER2: (naive B-cell, memory B-cell) IGHM (naive B-cell, memory B-cell) IL4R:naive B-cell

markers_gg(c("TCL1A","IGHD","FCER2","IGHM","IL4R"))
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

markers_gg(naive_mem_bcell)
## [[1]]

## 
## [[2]]

markers_gg(memory_bcell)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(dz_gc_bcell)
## [[1]]

## 
## [[2]]

markers_gg(lz_gc_bcell)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(pc)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

markers_gg(poor_q_doublets)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

Proliferating cell nuclear antigen (PCNA)

markers_gg(Unk)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(PDC)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg( "MYO1E")
## [[1]]

3.2 Top 7 of each cluster

3.2.1 Cluster 0:

markers_gg("COL4A3")
## [[1]]

FCRL4 is an immunoregulatory receptor that belongs to the Fc receptor-like (FCRL) family. In healthy individuals, FCRL4 is specifically expressed by memory B cells (MBCs) localized in sub-epithelial regions of lymphoid tissues. Expansion of FCRL4+ B cells has been observed in blood and other tissues in various infectious and autoimmune disorders ptprk: naive-memory

markers_gg(top7mark_cluster0)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

naive_markers<-c("CD79A", "CD79B", "BLNK")
memory_markers<-c("CD27")
markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(memory_markers)
## [[1]]

markers_gg(c("MS4A1","NT5E"))
## [[1]]

## 
## [[2]]

MS4A1:NAIVE-MEMORY B CELL NT5E: NAIVE B CELL

3.2.2 Cluster 1:

markers_gg(top7mark_cluster1)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.3 Cluster 2:

markers_gg(top7mark_cluster2)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.4 Cluster 3:

markers_gg(top7mark_cluster3)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

INPP4B: Immune cell enhanced (memory CD4 T-cell)

3.2.5 Cluster 4

AOAH: NK GNLY:NK

markers_gg(top7mark_cluster4)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.2.6 Cluster 5

markers_gg(top7mark_cluster5)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

3.3 Markers FOXO1, NFKB1

 p <- FeaturePlot(
    tonsil_wnn_bcell,
    features = c("NFKB1","FOXO1"),
    reduction = "wnn.umap",
    pt.size = 0.1
  )
p

4 Mitocondrial

FeaturePlot(
    tonsil_wnn_bcell,
    features = "percent.mt",
    reduction = "wnn.umap",
    pt.size = 0.1
  )

VlnPlot(tonsil_wnn_bcell, features = "percent.mt", group.by = "wsnn_res.0.075", pt.size=0)

VlnPlot(tonsil_wnn_bcell, features = "percent_ribo", group.by = "wsnn_res.0.075", pt.size=0)

weight.vp<- VlnPlot(tonsil_wnn_bcell, features = "RNA.weight", group.by = "wsnn_res.0.075",pt.size = 0)
p.weight.vp<- VlnPlot(tonsil_wnn_bcell, features = "peaks.weight", group.by = "wsnn_res.0.075",pt.size = 0)

weight.vp + ggtitle("RNA modality weight")

p.weight.vp+ ggtitle("ATAC modality weight")

5 Number of cell in each cluster

Idents(tonsil_wnn_bcell) <- "wsnn_res.0.075"

cell.num <- table(Idents(tonsil_wnn_bcell))
cell.num
## 
##     0     1     2     3     4     5 
## 15538 11029  7224  6704  1865   250

6 Rename clusters

new.cluster.ids <- c("MBC", "NBC","GC/DZ", "GC/LZ", "GC/PC", "PC")
names(new.cluster.ids) <- levels(tonsil_wnn_bcell)
tonsil_wnn_bcell <- RenameIdents(tonsil_wnn_bcell, new.cluster.ids)
DimPlot(tonsil_wnn_bcell, reduction = "wnn.umap", label = TRUE, pt.size = 0.5) 

7 Bibliography markers

MARKERS

Immature B cells express CD19, CD 20, CD34, CD38, and CD45R, T-cell receptor/CD3 complex (TCR/CD3 complex)

  • T-cells (identified by high expression of CD3D and CD3E).
  • monocytes (identified by high expression of LYZ and S100A8).
  • naive B-cells (identified by high expression of CD79A, CD79B and BLNK).
  • plasma cells (identified by B-cell and proliferation markers, such as TOP2A or MKI67).
  • poor-quality cells (identified by high mitochondrial expression). If a cell has pores in the membrane due to cell lysis, the cytosolic mRNA will leak out of the cell; but if the diameter of mitochondria is higher than the pores, they will get trapped inside the cell.
canonical_bcell_markers <-c("CD34", "CD38", "CD19")

monocytes_markers<-c("LYZ","S100A8")

naive_markers<-c("CD79A", "CD79B", "BLNK")

bib_Bcell_markers<-c("CD19","CR2","MS4A1","RALGPS2","CD79A")
bib_Tcell_markers<-c("CD3E","CD4","CD8A","FOXP3","IL17A")

7.1 B cells

markers_gg(naive_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

markers_gg(bib_Bcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

7.2 T-cells

CD8+ T cell markers:“CD3D”, “CD8A” NK cell markers:“GNLY”, “NKG7”

markers_gg(bib_Tcell_markers)
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

DimPlot(tonsil_wnn_bcell,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T, group.by = "age_group")

8 CellCycleScoring

DimPlot(tonsil_wnn_bcell,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T, group.by = "Phase")

DimPlot(tonsil_wnn_bcell,
    reduction = "wnn.umap",
    pt.size = 0.1, label = T)